#!/usr/bin/Rscript
##########################################################################################
# Issue Ownership and Agenda Setting in the 2019 Swiss National Elections
##########################################################################################
# Description:
# VAR Model Diagnostic
##########################################################################################
# Content
##########################################################################################
# 1) Dependencies
# 2) Load Data
# 3) Heteroscedasticity
# 4) Normality of the distribution
# 5) Stability of the distribution
##########################################################################################
# 1) Dependencies
##########################################################################################
library(tidyr)
library(dplyr)
library(vars)
library(boot)
library(rio)
library(ggplot2)
library(forecast)
library(urca)
library(tseries)
##########################################################################################
# 2) Load Data
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# - load VAR models
var_model_merged <- readRDS("../var/var_model_all_big_jan.RDS")
var_irfs_cum_merged <- readRDS("../var/var_irfs_main_all_big_jan.RDS")

data <- readRDS("../data/main_data_for_paper_all_topics_sep_party_19jan.RDS") %>% filter(selectsclass %in% c("Environment_Energy","EU_Europa","GenderIssues_Discrimination","Immigration_Asylum"))
##########################################################################################
# 3) Stationarity Test 
##########################################################################################
# make ts-object for variables of interest
topic <- unique(data$selectsclass)
depvars <- c("Media_SMD","Party_BDP_TW","Party_CVP_TW","Party_FDP_TW","Party_GLP_TW","Party_Grüne_TW","Party_SP_TW","Party_SVP_TW")
for(i in topic){
  tmp <- data %>% filter(selectsclass == paste0(i))
  for(k in depvars){
    y1 <- data %>% dplyr::select(all_of(k))
    
    x <- y1[,k]
    # - for some groups the last couple observations for each issues are NA,
    #     making these a 0 
    x[which(is.na(x)),] <- 0.01
    # - adding 1 percentage point to avoid 0s before the logit transformation
    #x <- x + 0.01
    # - applying the non-linear transformation
    logit_x <- log(x / (1-x))
    # If a topic makes up 100 % on a day transform the inf value to 11.51292 which equals 99 %
    logit_x[mapply(is.infinite, logit_x)] <- 4.59512
    y1[,k] <- logit_x
    
    y1 <- ts(y1, start = c(2019,1,1), frequency = nrow(tmp))
    cat(paste0("Topic: ", i, " and Variable: ", k, "\n"))
    print(adf.test(y1, k = 7))
  }
  
}

# if p-value smaller than 0.05 data is stationary which is good. This is th case for all vars and topics
##########################################################################################
# 4) Heteroscedasticity
##########################################################################################
# - lets consider the presence of heteroscedasticity. 
# - in time series, there is what we call ARCH effects which are essentially clustered volatility areas in a time series.
for(a in 1:length(var_model_merged)){
  arch <- arch.test(var_model_merged[[a]], lags.multi = 7, multivariate.only = TRUE)
  # If p >= 0.05: The null hypothesis of no heteroscedasticity is rejected since the p-value 
  # of X is lower than the significance level alpha of 0.05. 
  cat(paste0("p-value: ", arch[["arch.mul"]][["p.value"]], "\n"))
}
# Model Five has a p value of 0.76 so we are good since tis is the model we are working with.
##########################################################################################
# 4) Normality of the distribution
##########################################################################################
# - lets consider normality of the distribution (this is only a soft pre-requisite but a desirable one) 
for(a in 1:length(var_model_merged)){
  norm <- normality.test(var_model_merged[[a]], multivariate.only = TRUE)
  # If p >= 0.05: The null hypothesis of normality is rejected since the p-value 
  # of X is lower than the significance level alpha of 0.05. 
  cat(paste0("p-value Jaque-Bera test: ", norm[["jb.mul"]][["JB"]][["p.value"]], "\n",
             "p-value Kurtosis test: ", norm[["jb.mul"]][["Kurtosis"]][["p.value"]], "\n",
             "p-value Skewness test: ", norm[["jb.mul"]][["Skewness"]][["p.value"]], "\n"))
}
# This criteria is not met but this is fine as it is not a hard criteria!
##########################################################################################
# 5) Stability of the distribution
##########################################################################################
for(a in 1:length(var_model_merged)){
  stability <- stability(var_model_merged[[a]], type = "OLS-CUSUM")
  plot(stability)
}
# There are no points along the graphs that exceed the red lines which is good. 

